import urllib.request
import urllib.parse
import requests
from lxml import etree
import re
import json
import threading
import time

headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.0.0 Safari/537.36'
}

url = 'https://static-data.gaokao.cn/www/2.0/school/name.json'
res = requests.get(url=url, headers=headers)  # 这个获取我们的 name里面的
res2 = res.text
res2 = json.loads(res2)
# 加上这句话我们才能使我们字符串转成我们的列表之后

# print(res2['data'])
# print(res2)
# 用这个可以将我们的数据进行解码（将我们的中文名字获取出来）
# str = '\\u5a04\\u5e95\\u5e7c\\u513f\\u5e08\\u8303\\u9ad8\\u7b49\\u4e13\\u79d1\\u5b66\\u6821'
# print(str.encode('utf8').decode('unicode_escape'))


# 学校的url地址 https://www.gaokao.cn/school/3490  其中的3490是可以进行替换的


# 进入页面获取图片的地址 https://static-data.gaokao.cn/www/2.0/school/589/img/list.json  这个是获取图片的地址的数据
url2 = 'https://static-data.gaokao.cn/www/2.0/school/589/img/list.json'
img_json = requests.get(url=url2, headers=headers)


# print(json.loads(img_json.text))


# 其实我们只要是获取到我们的 学校的id之后我们的数据其实都是可以获取到的
# https://static-data.gaokao.cn/upload/school/201908/1560005035_6240_thumb.jpg


# https://static-data.gaokao.cn/upload/logo/862.jpg  logo的图标


# https://static-data.gaokao.cn/www/2.0/school/name.json
def get_res(url):
    res = requests.get(url=url, headers=headers)
    return res.text


def save(name, id):
    base_url = f'https://static-data.gaokao.cn/upload/logo/{id}.jpg'
    res = requests.get(url=base_url, headers=headers).content
    with open('D:\\deleteAnyTime\\爬虫获取的数据\\大学logo信息\\' + name + '.jpg', mode='wb') as f:
        f.write(res)
        print(name, "保存成功")


def get_logo_pic(school_list):
    # 开启多线程（进行爬取）
    print("线程开始")
    threads = []

    for school in school_id_list:
        threads.append(
            threading.Thread(target=save, args=(school['name'], school['school_id']))
        )

    for thread in threads:
        thread.start()

    for thread in threads:
        thread.join()
    pass


if __name__ == '__main__':
    log_url = 'https://static-data.gaokao.cn/www/2.0/school/name.json'
    res = get_res(log_url)
    school_id_list = json.loads(res)['data']
    get_logo_pic(school_id_list)
    pass
